library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
load("../1_data_management_dplyr/fichiers_prepared.RData")
stats <- finess_et%>%group_by(libcategagregET)%>%summarise(nb_etablissement=n())
g <- ggplot(data=stats)+geom_col(aes(x=libcategagregET,y=nb_etablissement))
g
# stats <- stats%>%arrange(nb_etablissement)
stats <- stats%>%mutate(libcategagregET = factor(libcategagregET, levels = libcategagregET[order(nb_etablissement)]))
g <- ggplot(data=stats)+geom_col(aes(x=libcategagregET,y=nb_etablissement))
g
g+theme(axis.text.x = element_text(angle = 90))
finess_et <- finess_et%>%
mutate(dateautor=as.Date(dateautor,"%Y-%m-%d"),
yearautor=year(dateautor),
monthautor=month(dateautor))
year(finess_et$dateautor)%>%summary
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 151 1978 2005 1991 2017 2042 497
# finess_et%>%filter(yearautor<1900) # bizarre....
finess_et%>%
filter(yearautor>1900) %>%
group_by(yearautor
# ,monthautor
)%>%
summarise(nb_autor=n())%>%
{ggplot(.)+
geom_col(aes(x=yearautor,y=nb_autor))
# geom_col(aes(x=paste0(yearautor,monthautor),y=nb_autor))
}%>%(plotly::ggplotly)
# Autre solution avec substr, attention si la date n'est pas renseignée...
# finess_et <- finess_et %>%
# mutate(yearautor=substr(dateautor,1,4))
head(finess_et$yearautor)
## [1] 1979 1901 1901 1901 1945 1945
Investigons ces établissements qui ont reçu leur autorisation en 2017
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# https://plot.ly/r/pie-charts/
finess_et%>%filter(yearautor==2017)%>%
group_by(libcategET)%>%
summarise(nb_ET=n())%>%
plot_ly(labels = ~libcategET, values = ~nb_ET, type = 'pie') %>%
layout(title = 'Répartition des établissements ayant reçu une autorisation en 2017, par catégorie'
,xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE)
)
Comparons avec les autres années :
finess_et%>%filter(!yearautor==2017)%>%
group_by(libcategET)%>%
summarise(nb_ET=n())%>%
plot_ly(labels = ~libcategET, values = ~nb_ET, type = 'pie') %>%
layout(title = 'Répartition des établissements ayant reçu une autorisation en 2017, par catégorie'
,xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE)
)
série temp par catégorie ? On va retenir les 10 categories les plus fréquentes et tracer l’évolution des ouvertures pour chacune.
finess_et%>%
filter(yearautor>1900) %>%
# ON FILTRE LE TOP 10 DES libcategET
group_by(libcategET)%>%
mutate(volume_categ=n())%>%#mutate au lieu de summarise, comme un group_by + merge
ungroup()%>%
arrange(-volume_categ)%>%{
(.)%>%select(libcategET)%>%distinct()%>%head(10)->top10categ#Attention à la notation .%>% https://stackoverflow.com/questions/36716710/combining-pipes-and-the-dot-placeholder-in-r?rq=1
print(top10categ)
(.)%>%filter(libcategET %in% top10categ$libcategET)
}%>%
# ON CALCULE LE NOMBRE DE SIRET PAR yearautor PAR TOP10 categ
group_by(libcategET,yearautor)%>%
summarise(nb_autorisation=n())%>%
plot_ly(x=~yearautor,y=~nb_autorisation,color=~libcategET,mode="lines")
## # A tibble: 10 x 1
## libcategET
## <chr>
## 1 Pharmacie d'Officine
## 2 Service d'Aide et d'Accompagnement à Domicile (S.A.A.D.)
## 3 Etablissement d'hébergement pour personnes âgées dépendantes
## 4 Laboratoire de Biologie Médicale
## 5 Résidences autonomie
## 6 Centre de Santé
## 7 Service de Soins Infirmiers A Domicile (S.S.I.A.D)
## 8 Service d'Ambulances
## 9 Foyer de Vie pour Adultes Handicapés
## 10 Service d'Éducation Spéciale et de Soins à Domicile
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
# https://plot.ly/r/time-series/
Suivi sur les SIRET multi-lignes
Remarques : il faudrait peut-être investiguer les 13770 SIRET NA.
Sélection du TOP10 des SIRET multi-lignes
top10_SIRET_multilignes=finess_et%>%
group_by(SIRET)%>%
summarise(nb_ligne_par_SIRET=n())%>%
filter(nb_ligne_par_SIRET>1)%>%
filter(!is.na(SIRET))%>%
arrange(-nb_ligne_par_SIRET)%>%
head(10)
Changement de référentiel de coordonnées
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.2-18, (SVN revision 718)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: C:/Users/phileas.condemine/Documents/R/R-3.4.4/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: C:/Users/phileas.condemine/Documents/R/R-3.4.4/library/rgdal/proj
## Linking to sp version: 1.2-7
finess_geo <- finess_geo%>%mutate(X=as.numeric(X),Y=as.numeric(Y))
finess_geo <- na.omit(finess_geo)
coordinates(finess_geo) <- c("X", "Y")
proj4string(finess_geo) <- CRS("+init=epsg:2154") # WGS 84
CRS.new <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
finess_geo <- spTransform(finess_geo, CRS.new)
finess_geo$lon <- data.frame(coordinates(finess_geo))$X
finess_geo$lat <- data.frame(coordinates(finess_geo))$Y
Carte :
library(leaflet)
pal <- colorNumeric(c("red", "green", "blue"), 1:10)
finess_et%>%
filter(SIRET%in%top10_SIRET_multilignes$SIRET)%>%
merge(finess_geo,by="finessET")%>% ###%>%select(finessET,lon,lat)
arrange(dateautor)%>%
leaflet()%>%
addTiles()%>%
addCircleMarkers(lng=~lon, lat=~lat, color=~pal(as.numeric(factor(SIRET))), label=~paste0("SIRET:",SIRET,"finessET",finessET,"\ndateautor:",dateautor)
# , clusterOptions = markerClusterOptions()
)
Pour approfondir sur l’utilisation de leaflet, un présentation sympa : https://bhaskarvk.github.io/leaflet-talk-rstudioconf-2017/RstudioConf2017.html#28